import scrapy
from scrapy import Request
from fangProject.items import FangprojectItem


class fangSpider(scrapy.Spider):
    name = "fangSpider"
    headers = {
        "user-agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 \
        (KHTML, like Gecko) Chrome/78.0.3904.87 Safari/537.36"
    }
    currentPage = 1

    def start_requests(self):
        """
        爬虫->Request:创建
        Requset->Response:获取下载数据
        Response-> 爬虫:数据
        """
        url = "https://zz.newhouse.fang.com/house/s/"   # 爬取郑州房价
        yield Request(url, headers=self.headers)

    def parse(self, response):
        lis = response.xpath("//div[@class='nl_con clearfix']/ul/li")
        for each in lis:
            item = FangprojectItem()
            item["HouseName"] = str(each.xpath("div/div[2]/div[1]/div[1]/a/text()").extract()[0]).strip().replace("\\n", "").replace("\\t", "")
            item["HousePirce"] = str(each.xpath("div/div[2]/div[5]/span/text()").extract()).strip().replace("\\n", "").replace("\\t", "")
            item["HouseAddress"] = str(each.xpath("div/div[2]/div[3]/div[1]/a/@title").extract()[0]).strip().replace("\\n", "").replace("\\t", "")
            item["HouseComment"] = str(each.xpath("div/div[2]/div[1]/div[2]/a/span/text()").extract()[0]).strip().replace("\\n", "").replace("\\t", "")
            item["HouseTel"] = str(each.xpath("div/div[2]/div[3]/div[2]/p/text()").extract()).strip().replace("\\n", "").replace("\\t", "")
            yield item

        if self.currentPage < 20:
            self.currentPage += 1
        yield scrapy.Request("https://zz.newhouse.fang.com/house/s/b9"+str(self.currentPage)+"/", callback=self.parse)


